
* ---------------------------------------------------
* Event History Analysis
* Josef Brderl, March 2011
* Entry into Motherhood (data from ALLBUS 2000)
* ---------------------------------------------------

* duration: age at birth of first child - 14 (measured in years only)
* child: =1 if child, =0 if censored at time of interview
* educ: years of education
* east: =1 if born in East Germany, =0 if born in West Germany
* coh: birth cohort dummies

version 11
cd "K:\Vorlesung EHA\Stata Beispiele\"    //working directory


***********************************************
* -----------------------------------
*     Discrete Time EHA
* -----------------------------------
***********************************************

***********************************************
*   I) PERSON-PERIOD Episode Splitting
***********************************************
use Motherhood.dta, clear
stset duration, id(persnr) failure(child==1)
stsplit T0, every(1)                    //"person-period" episode splitting
gen ineduc = T0 <= (educ+6-14)          // constructing the time-varying covariate

* Censoring indicator (_d) is the dependent variable.
* Time is _t (1, 2, 3, ...).

* Linear-Logistic model
gen lnt = ln(_t)
streg    educ ineduc east coh2 coh3 coh4 coh5 _t lnt, dist(exp) //continuous time
logit _d educ ineduc east coh2 coh3 coh4 coh5 _t lnt, or        //discrete time
* 4 "failures" completely determined means that 4 splits have
* identical covariate values and only 0 on _d.
* "Failure" in logit models is 0 on the dependent variable.

* Complementary log-log model
* Theoretically this model reproduces a PH-model best. In fact, if we include a dummy
* for every time point we have the grouped Cox model. Practically, however, this model 
* is seldom used. Note that Cloglog-LinLog is closer to the continuous-time LinLog 
* (which is also a PH model). The Logit-LinLog is not so close (it is a non-PH model).
cloglog _d educ ineduc east coh2 coh3 coh4 coh5 _t lnt, eform


***********************************************
*   II) EXPAND Episode Splitting
***********************************************
use Motherhood.dta, clear

preserve                //for comparison: episode-splitting by stsplit
stset duration, id(persnr) failure(child==1)
stsplit T0, every(1)                    //"person-period" episode splitting
gen ineduc = T0 <= (educ+6-14)          //the time-varying covariate
gen lnt = ln(_t)
logit _d educ ineduc east coh2 coh3 coh4 coh5 _t lnt //LinLog
est store LogitI
restore

expand duration
bysort persnr: gen t = _n                        //time variable
gen d = 0                                        //failure indicator
bysort persnr (t): replace d=1 if child==1 & t==_N
gen ineduc = t-1 <= (educ+6-14)                  //the time-varying covariate
gen lnt = ln(t)
logit d educ ineduc east coh2 coh3 coh4 coh5 t lnt //LinLog
est store LogitII

estimates table LogitI LogitII, b(%9.2f) t(%9.2f) ///
                equations(1) keep(educ ineduc east coh2 coh3 coh4 coh5) 



***********************************************
*     Conditional Effect Plots
***********************************************
use Motherhood.dta, clear
stset duration, id(persnr) failure(child==1)
stsplit T0, every(1)                    //"person-period" episode splitting
gen ineduc = T0 <= (educ+6-14)          // constructing the time-varying covariate
gen lnt = ln(_t)
streg    educ ineduc east coh2 coh3 coh4 coh5 _t lnt, dist(exp) nohr
est store ContLinLog              //continuous time Linear-Logistic
logit _d educ ineduc east coh2 coh3 coh4 coh5 _t lnt 
est store LogitLinLog             //discrete time Logit-Linear-Logistic

* Overall Rates
preserve
replace educ=13               //abitur
replace ineduc=0              //out of school
replace east=0                //West German
replace coh2=1                //cohort 2
replace coh3=0 
replace coh4=0 
replace coh5=0
est restore ContLinLog
predict r1, hazard            //continuous LinLog
est restore LogitLinLog
predict r2                    //Logit-LinLog
twoway (line r1 _t if _t<31, sort c(J) lcolor(blue) lwidth(thick))           ///
       (line r2 _t if _t<31, sort c(J) lcolor(red)  lwidth(thick)),          ///
   title("Comparing continuous and discrete LinLog", size(large))            ///
   ytitle("fertility rate", size(large))   xsize(4.5)                        ///  
   ylabel(0(.02)0.1, format(%3.2f) labsize(medlarge) angle(horizontal) grid) ///
   xtitle("age - 14", size(large)  margin(0 0 0 2))                          ///  
   xlabel(0(5)30, labsize(medlarge))                                         ///
   legend(order(1 "Cont. LinLog" 2 "Logit-LinLog")                           ///
   rows(2) size(medlarge) position(1) ring(0))
restore

* East-West
preserve
replace educ=13               //abitur
replace ineduc=0              //out of school
replace coh2=1                //cohort 2
replace coh3=0 
replace coh4=0 
replace coh5=0
est restore ContLinLog
predict r1, hazard            //continuous LinLog
est restore LogitLinLog
predict r2                    //Logit-LinLog
twoway (line r1 _t if east==0&_t<31, sort c(J) lcolor(blue) lwidth(thick))      ///
       (line r1 _t if east==1&_t<31, sort c(J) lcolor(red)  lwidth(thick))      ///
       (line r2 _t if east==0&_t<31, sort c(J) lcolor(green) lwidth(thick))     ///
       (line r2 _t if east==1&_t<31, sort c(J) lcolor(dkgreen)  lwidth(thick)), ///
   title("Comparing continuous and discrete LinLog", size(large))               ///
   ytitle("fertility rate", size(large))   xsize(4.5)                           ///  
   ylabel(0(.025)0.15, format(%4.3f) labsize(medlarge) angle(horizontal) grid)  ///
   xtitle("age - 14", size(large)  margin(0 0 0 2))                             ///  
   xlabel(0(5)30, labsize(medlarge))                                            ///
   legend(order(1 "Cont. West" 2 "Cont. East" 3 "Logit West" 4 "Logit East")    ///
   rows(4) size(medium) position(1) ring(0))
restore



***********************************************
* -----------------------------------
*     Further Topics in EHA
* -----------------------------------
***********************************************


****************************************
* Frailty Model
****************************************
use Motherhood.dta, clear
stset duration, failure(child==1) 

* Log-Logistic Model
streg educ east coh2 coh3 coh4 coh5, dist(loglogistic)
stcurve, hazard at1(east=0) at2(east=1) range (0 30) xsize(4.5)           ///
       lwidth(thick thick)                                                ///
       title("Log-logistic model hazard")                                 ///
       xtitle("age - 14", size(large) margin(0 0 0 2))                    ///
	   ytitle("fertility rate", size(large))                              ///
       xlabel(0(5)30, labsize(medium))                                    ///
	   ylabel(0(0.025)0.175, angle(0) grid labsize(medium) format(%5.3f)) ///
   	   legend(pos(1) ring(0) row(2) order(1 2) lab(1 "West")              ///
		lab(2 "East") size(medlarge))   

* Log-Logistic Model with Gamma Frailty
streg educ east coh2 coh3 coh4 coh5, dist(loglogistic) frailty(gamma)
stcurve, hazard unconditional at1(east=0) at2(east=1) range (0 30) xsize(4.5) ///
       lwidth(thick thick)                                                    ///
       title("Log-logistic/gamma model population hazard", span)              ///
       xtitle("age - 14", size(large) margin(0 0 0 2))                        ///
	   ytitle("fertility rate", size(large))                                  ///
       xlabel(0(5)30, labsize(medium))                                        ///
	   ylabel(0(0.025)0.175, angle(0) grid labsize(medium) format(%5.3f))     ///
   	   legend(pos(1) ring(0) row(2) order(1 2) lab(1 "West")                  ///
		lab(2 "East") size(medlarge))   


****************************************
* Duration Dependence
****************************************
use Motherhood.dta, clear
recode duration 40/max=39    //t(max) is defined (for reasons of comparison)
stset duration, failure(child==1) 

* Standard Log-Logistic Model
streg educ east coh2 coh3 coh4 coh5, dist(loglogistic)
est store LogLog

* Log-Logistic Model with Gamma Frailty
streg educ east coh2 coh3 coh4 coh5, dist(loglogistic) frailty(gamma)
est store LogGamma

* Log-Logistic Model with Ancillary Parameters
streg educ east coh2 coh3 coh4 coh5, dist(loglogistic) ancillary(educ east)
est store LogAncill

estimates stats LogLog LogGamma LogAncill


****************************************
* Timing- an Intensity Effects
****************************************
use Motherhood.dta, clear
stset duration, failure(child==1) 

recode educ min/12.5=0 13/max=1, into(abi)

sts graph if east==1, hazard by(abi) tmax(30) width(2 2) xsize(4.5)      ///
       plot1opts(lwidth(thick)) plot2opts(lwidth(thick))                 ///
       title("East-Germany (non-parametric hazard)")                     ///
       xtitle("age - 14", size(large) margin(0 0 0 2))                   ///
	   ytitle("fertility rate", size(large))                             ///
       xlabel(0(5)30, labsize(medium))                                   ///
	   ylabel(0(0.05)0.2, angle(0) grid labsize(medium) format(%4.2f))   ///
   	   legend(pos(1) ring(0) row(2) order(1 2) lab(1 "below Abi")        ///
		lab(2 "Abi") size(medlarge))   

